***************************************************************************
***************************************************************************
**Replication files for Enns, Moehlecke and Wlezien: "Detecting True 
**Relationships in Time Series Data with Different Orders of Integration"**
**Tables 1-4 on main paper*************************************************
**Last run: Jun 27th, 2021************************************************** 
***************************************************************************
***************************************************************************

version 14.2

*Note: we use the same seed throughout the simulations so that results are 
*directly comparable. We reset the seed for each simulation.


*seed selected based on random.org, b/t 1 and 1000000000
set seed 457090451


*************************************************************************
*************************************************************************
****************************TABLE 1**************************************
*************************************************************************
*************************************************************************


**********************************
*Table 1 layout
**********************************
*generate excel file to store Table 1 output
putexcel set table1.xlsx, sheet(sheet1) replace
*add labels to table
*row labels
putexcel A4 = "$\hat{\alpha}_1$"
putexcel A5 = "$\hat{\alpha^*}_1$"
putexcel A6 = "$\hat{\beta}_1$"
putexcel A7 = "$\hat{\beta}_2$"
putexcel A8 = "$\hat{\beta^*}_2$"

*column labels
putexcel B1 = "ADL"
putexcel B2 = "$\rho = 0.2$"
putexcel B3 = "coef"
putexcel C3 = "%"
putexcel D2 = "$\rho = 0.5$"
putexcel D3 = "coef"
putexcel E3 = "%"
putexcel F2 = "$\rho = 0.8$"
putexcel F3 = "coef"
putexcel G3 = "%"

putexcel H1 = "GCM"
putexcel H2 = "$\rho = 0.2$"
putexcel H3 = "coef"
putexcel I3 = "%"
putexcel J2 = "$\rho = 0.5$"
putexcel J3 = "coef"
putexcel K3 = "%"
putexcel L2 = "$\rho = 0.8$"
putexcel L3 = "coef"
putexcel M3 = "%"

putexcel N1 = "First Difference"
putexcel N2 = "$\rho = 0.2$"
putexcel N3 = "coef"
putexcel O3 = "%"
putexcel P2 = "$\rho = 0.5$"
putexcel P3 = "coef"
putexcel Q3 = "%"
putexcel R2 = "$\rho = 0.8$"
putexcel R3 = "coef"
putexcel S3 = "%"


************************
**T=5000; x1 = I(0), rho=.2, .5, .8
**x2 = I(1)
**y = x1 + x2 
*************************

*************************************************************************
*************************************************************************
*TABLE 1 - ADL
*************************************************************************
*************************************************************************
set seed 457090451

**********
**rho=.2
**********
*program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg y l.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_3 = _b[L.x1]
sum _sim_1  _sim_3


*export results from ADL rho = 0.2 to excel file
**$\hat{\alpha}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel B4 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_5) if abs(_sim_1/_sim_5)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel C4 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel B6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1) >=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel C6 = `perc'

**$\hat{\beta}_2$
*ceof
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel B7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel C7 = `perc'



**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg y l.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_3 = _b[L.x1]
sum _sim_1 _b_x1 _sim_3


*export results from ADL rho = 0.5 to excel file
**$\hat{\alpha}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel D4 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_5) if abs(_sim_1/_sim_5)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel E4 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel D6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1) >=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel E6 = `perc'

**$\hat{\beta}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel D7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel E7 = `perc'


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg y l.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_3 = _b[L.x1]
sum _sim_1 _b_x1 _sim_3



*export results from ADL rho = 0.8 to excel file
**$\hat{\alpha}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel F4 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_5) if abs(_sim_1/_sim_5)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel G4 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel F6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1) >=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel G6 = `perc'

**$\hat{\beta}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel F7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel G7 = `perc'



*************************************************************************
*************************************************************************
*TABLE 1 - GECM
*************************************************************************
*************************************************************************
*reset seed
set seed 457090451

**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg d.y l.y d.x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq


sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_2 = _b[D.x1]
*_sim_3 = _b[L.x1]
sum  _sim_1 _sim_2 _sim_3


*export results from GECM rho = 0.2 to excel file
**$\hat{\alpha^*}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel H5 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_6) if abs(_sim_1/_sim_6)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel I5 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel H6 = `r(mean)', nformat("#0.####")
*%
gen tstat_dx1 = abs(_sim_2/_sim_6) if abs(_sim_2/_sim_6) >=1.96
quietly sum tstat_dx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel I6 = `perc'

**$\hat{\beta^*}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel H8 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel I8 = `perc'



**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg d.y l.y d.x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_2 = _b[D.x1]
*_sim_3 = _b[L.x1]
sum  _sim_1 _sim_2 _sim_3



*export results from GECM rho = 0.5 to excel file
**$\hat{\alpha^*}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel J5 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_6) if abs(_sim_1/_sim_6)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel K5 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel J6 = `r(mean)', nformat("#0.####")
*%
gen tstat_dx1 = abs(_sim_2/_sim_6) if abs(_sim_2/_sim_6) >=1.96
quietly sum tstat_dx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel K6 = `perc'

**$\hat{\beta^*}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel J8 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel K8 = `perc'


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg d.y l.y d.x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_2 = _b[D.x1]
*_sim_3 = _b[L.x1]
sum  _sim_1 _sim_2 _sim_3



*export results from GECM rho = 0.8 to excel file
**$\hat{\alpha^*}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel L5 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_6) if abs(_sim_1/_sim_6)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel M5 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel L6 = `r(mean)', nformat("#0.####")
*%
gen tstat_dx1 = abs(_sim_2/_sim_6) if abs(_sim_2/_sim_6) >=1.96
quietly sum tstat_dx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel M6 = `perc'

**$\hat{\beta^*}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel L8 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel M8 = `perc'


*****************************************************************************
*************************************************************************
*TABLE 1 - FIRST-DIFFERENCE DV
*************************************************************************
*************************************************************************
*reset seed
set seed 457090451


**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg d.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_2 = _b[L.x1]
sum _b_x1 _sim_2


*export results from First Difference rho = 0.2 to excel file
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel N6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1)>=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc' 
putexcel O6 = `perc'

**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel N7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_2/_sim_5) if abs(_sim_2/_sim_5) >=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel O7 = `perc'



**********
**rho=.5
**********

program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg d.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_2 = _b[L.x1]
sum _b_x1 _sim_2


*export results from First Difference rho = 0.5 to excel file
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel P6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1)>=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc' 
putexcel Q6 = `perc'

**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel P7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_2/_sim_5) if abs(_sim_2/_sim_5) >=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel Q7 = `perc'


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2
reg d.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_2 = _b[L.x1]
sum _b_x1 _sim_2


*export results from First Difference rho = 0.8 to excel file
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel R6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1)>=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc' 
putexcel S6 = `perc'

**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel R7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_2/_sim_5) if abs(_sim_2/_sim_5) >=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel S7 = `perc'



*************************************************************************
*************************************************************************
****************************TABLE 2**************************************
*************************************************************************
*************************************************************************

**********************************
*Table 2 layout
**********************************
*generate excel file to store Table 2 output
putexcel set table2.xlsx, sheet(sheet1) replace
*add labels to table
putexcel A1 = "$Y I(1), x^s_t I(0)$"
putexcel A2 = "$Y I(0), x^s_t I(0)$"
putexcel A3 = "$Y I(1), x^s_t I(1)$"
putexcel A4 = "$Y I(0), x^s_t I(1)$"


************************
**T=100; x1 = I(0), rho=.5
**x2 = I(1)
**y = x1 + x2 
*************************

*************************************************************************
*************************************************************************
*TABLE 2 - ADL
*************************************************************************
*************************************************************************
*reset the seed
set seed 457090451

**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 100
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
		reg d.y x1 l.x1
		mat P = r(p)
		return scalar pvalue_bg = P[1,1] 
	
	end

	
simul _b _se p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 
				


sum _b_x1 _sim_2
gen tstat_x1_ = abs(_b_x1/_se_x1)
sum tstat_x1_ if tstat_x1_>=1.96
gen tstat_lx1 = abs(_sim_2/_sim_5)
sum tstat_lx1 if tstat_lx1>=1.96
sum _eq2_p_value_df_y if _eq2_p_value_df_y>0.05 
sum _eq2_p_value_df_y if _eq2_p_value_df_y<.05
gen yI1=0
replace yI1=1 if _eq2_p_value_df_y>.05
local fsig_df_y = r(N)/10  
sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1>0.05
sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1<.05
local fsig_df_x1 = r(N)/10 
gen xI1=0
replace xI1=1 if _eq2_p_value_df_x1>.05
				
				
*Produce Table 2 crosstab of Y and X1 characteristics.
tab yI1 xI1
gen y1x1=0
replace y1x1 =1 if yI1==1 & xI1==1
sum _b_x1 _sim_2 if y1x1==1
gen y1x0=0
replace y1x0=1 if yI1==1 & xI1==0
sum _b_x1 _sim_2 if y1x0==1
gen tstat_x1__ = abs(_b_x1/_se_x1)
sum tstat_x1__ if tstat_x1_>=1.96		
				
*Turn crosstabs into matrix to export Y and X1 characteristics into excel				
tab yI1 xI1, matcell(x)			
matrix list x

scalar a = x[2,1]
local a = a/2000
di `a'
putexcel B1 = `a'

scalar b = x[1,1]
local b = b/2000
di `b'
putexcel B2 = `b'

scalar c = x[2,2]
local c = c/2000
di `c'
putexcel B3 = `c'

scalar d = x[1,2]
local d = d/2000
di `d'
putexcel B4 = `d'



*************************************************************************
*************************************************************************
****************************TABLE 3**************************************
*************************************************************************
*************************************************************************


**********************************
*Table 3 layout
**********************************
*generate excel file to store Table 3 output
putexcel set table3.xlsx, sheet(sheet1) replace
*add labels to table

putexcel B1 = "First Difference, Y = I(1), $x^s_t$ = I (0)"
putexcel B2 = "coef"
putexcel C2 = "%"
putexcel D1 = "ADL, Y = I(0), $x^s_t$ = I(0)"
putexcel D2 = "coef"
putexcel E2 = "%"
putexcel A3 = "$\hat{\alpha}_1$"
putexcel A4 = "$\hat{\beta}_1$"
putexcel A5 = "$\hat{\beta}_2$"
putexcel A6 = "% cases"


*Estimation of relationship between DV and IV where former is I(1) and latter is I(0)* 

************************
**T=100; x1 = I(0), rho=.5
**x2 = I(1)
**y = x1 + x2 
*************************

*************************************************************************
*************************************************************************
*Differenced DV
*************************************************************************
*************************************************************************
*reset the seed
set seed 457090451

**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 100
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
		reg d.y x1 l.x1
		mat P = r(p)
		return scalar pvalue_bg = P[1,1] 
		
	end

	
				simul _b _se p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				sum _b_x1 _sim_2
				gen tstat_x1 = abs(_b_x1/_se_x1)
				sum tstat_x1 if tstat_x1>=1.96
				gen tstat_lx1 = abs(_sim_2/_sim_5)
				sum tstat_lx1 if tstat_lx1>=1.96
				sum _eq2_p_value_df_y if _eq2_p_value_df_y>0.05 
				sum _eq2_p_value_df_y if _eq2_p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if _eq2_p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1>0.05
				sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if _eq2_p_value_df_x1>.05
				tab yI1 xI1
				gen y1x1=0
				replace y1x1 =1 if yI1==1 & xI1==1
				sum _b_x1 _sim_2 if y1x1==1
				gen y1x0=0
				replace y1x0=1 if yI1==1 & xI1==0
				sum _b_x1 _sim_2 if y1x0==1

				sum tstat_x1 if y1x0==1			
				sum tstat_lx1 if y1x0==1

*Mean estimate of IV coefficients across simulations and frequency of statistical significance*
				sum _b_x1 _sim_2 if y1x0==1 & tstat_x1>1.96



				
*export results from First Differenced DV rho = 0.5 to excel file

**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel B4 = `r(mean)', nformat("#0.####")
*%
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel C4 = `perc'


				
**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel B5 = `r(mean)', nformat("#0.####")
*%
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel C5 = `perc'



*Estimation of relationship between DV and IV where former is I(0) and latter is I(0)* 

************************
**T=100; x1 = I(0), rho=.5
**x2 = I(1)
**y = x1 + x2 
*************************


*************************************************************************
*************************************************************************
*ADL
*************************************************************************
*************************************************************************
*reset the seed
set seed 457090451

**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 100
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
		reg y l.y x1 l.x1
		mat P = r(p)
		return scalar pvalue_bg = P[1,1] 
		
	end

	
				simul _b _se p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				sum _sim_1 _b_x1 _sim_3
				gen tstat_ly = abs(_sim_1/_sim_5)
				sum tstat_ly if tstat_ly>=1.96
				gen tstat_x1 = abs(_b_x1/_se_x1)
				sum tstat_x1 if tstat_x1>=1.96
				gen tstat_lx1 = abs(_sim_3/_sim_7)
				sum tstat_lx1 if tstat_lx1>=1.96
				sum _eq2_p_value_df_y if _eq2_p_value_df_y>0.05 
				sum _eq2_p_value_df_y if _eq2_p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if _eq2_p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1>0.05
				sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if _eq2_p_value_df_x1>.05
				tab yI1 xI1
				gen y1x1=0
				replace y1x1 =1 if yI1==1 & xI1==1
				sum _b_x1 _sim_3 if y1x1==1
				gen y1x0=0
				replace y1x0=1 if yI1==1 & xI1==0
				gen y0x0=0
				replace y0x0=1 if yI1==0 & xI1==0
				sum _sim_1 _b_x1 _sim_3 if y0x0==1
				sum tstat_ly if y0x0==1
				sum tstat_x1 if y0x0==1			
				sum tstat_lx1 if y0x0==1
				
*Mean estimate of IV coefficients across simulations and frequency of statistical significance*
				sum _sim_1 _b_x1 _sim_3 if y0x0==1 & tstat_x1>1.96	
				

				
*export results from ADL rho = 0.5 to excel file

**$\hat{\alpha^}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel D3 = `r(mean)', nformat("#0.####")
*%
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc'
putexcel E3 = `perc'
				
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel D4 = `r(mean)', nformat("#0.####")
*%
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel E4 = `perc'

				
**$\hat{\beta^}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel D5 = `r(mean)', nformat("#0.####")
*%
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel E5 = `perc'


*export the percentages that correctly identify the series to excel

*Turn crosstabs into matrix to export Y and X1 characteristics into excel				
tab yI1 xI1, matcell(x)			
matrix list x

scalar a = x[2,1]
local a = a/2000
di `a'
putexcel C6 = `a'

scalar b = x[1,1]
local b = b/2000
di `b'
putexcel E6 = `b'


*************************************************************************
*************************************************************************
****************************TABLE 4**************************************
*************************************************************************
*************************************************************************

***************
*Table 4 layout
***************

**********************************
*generate excel file to store Table 4 output
putexcel set table4.xlsx, sheet(sheet1) replace
*add labels to table
*row labels
putexcel A3 = "Y I(1), X I(0)"
putexcel A4 = "Y I(0), X I(0)"
putexcel A5 = "Y I(1), X I(1)"
putexcel A6 = "Y I(0), X I(1)"

*column labels
putexcel B1 = "T = 50"
putexcel B2 = "$\rho = 0.2$"
putexcel C2 = "$\rho = 0.5$"
putexcel D2 = "$\rho = 0.8$"

putexcel E1 = "T = 100"
putexcel E2 = "$\rho = 0.2$"
putexcel F2 = "$\rho = 0.5$"
putexcel G2 = "$\rho = 0.8$"

putexcel H1 = "T = 200"
putexcel H2 = "$\rho = 0.2$"
putexcel I2 = "$\rho = 0.5$"
putexcel J2 = "$\rho = 0.8$"

************************
**T=50; x1 = I(0), rho=.2
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451
**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 50
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell(x)
				
*Turn crosstab into matrix to export DV and IV characteristics into excel

matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel B3 = a

scalar b = x[1,1]/2000*100
di a
putexcel B4 = b

scalar c = x[2,2]/2000*100
di c
putexcel B5 = c

scalar d = x[1,2]/2000*100
di d
putexcel B6 = d




************************
**T=100; x1 = I(0), rho=.2
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451
**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 100
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
			
				tab yI1 xI1, matcell(x)
				
*Turn crosstab into matrix to export DV and IV characteristics into excel


matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel E3 = a

scalar b = x[1,1]/2000*100
di a
putexcel E4 = b

scalar c = x[2,2]/2000*100
di c
putexcel E5 = c

scalar d = x[1,2]/2000*100
di d
putexcel E6 = d


************************
**T=200; x1 = I(0), rho=.2
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451


**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 200
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell (x)
				
*Turn crosstab into matrix to export DV and IV characteristics into excel
				
				
matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel H3 = a

scalar b = x[1,1]/2000*100
di a
putexcel H4 = b

scalar c = x[2,2]/2000*100
di c
putexcel H5 = c

scalar d = x[1,2]/2000*100
di d
putexcel H6 = d


************************
**T=50; x1 = I(0), rho=.5
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451

**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 50
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell(x)

				
*Turn crosstab into matrix to export DV and IV characteristics into excel
				
matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel C3 = a

scalar b = x[1,1]/2000*100
di a
putexcel C4 = b

scalar c = x[2,2]/2000*100
di c
putexcel C5 = c

scalar d = x[1,2]/2000*100
di d
putexcel C6 = d
			

************************
**T=100; x1 = I(0), rho=.5
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451


**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 100
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell(x)

*Turn crosstab into matrix to export DV and IV characteristics into excel
				
matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel F3 = a

scalar b = x[1,1]/2000*100
di a
putexcel F4 = b

scalar c = x[2,2]/2000*100
di c
putexcel F5 = c

scalar d = x[1,2]/2000*100
di d
putexcel F6 = d


************************
**T=200; x1 = I(0), rho=.5
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451


**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 200
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell (x)

*Turn crosstab into matrix to export DV and IV characteristics into excel

				
matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel I3 = a

scalar b = x[1,1]/2000*100
di a
putexcel I4 = b

scalar c = x[2,2]/2000*100
di c
putexcel I5 = c

scalar d = x[1,2]/2000*100
di d
putexcel I6 = d

	

************************
**T=50; x1 = I(0), rho=.8
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 50
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell(x)

*Turn crosstab into matrix to export DV and IV characteristics into excel
				
matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel D3 = a

scalar b = x[1,1]/2000*100
di b
putexcel D4 = b

scalar c = x[2,2]/2000*100
di c
putexcel D5 = c

scalar d = x[1,2]/2000*100
di d
putexcel D6 = d


*using critical value of 0.1 (footnote 14)


*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.1 
				sum p_value_df_y if p_value_df_y<.1
				gen yI1_=0
				replace yI1_=1 if p_value_df_y>.1
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.1
				sum p_value_df_x1 if p_value_df_x1<.1
				local fsig_df_x1 = r(N)/10 
				gen xI1_=0
				replace xI1_=1 if p_value_df_x1>.1

*Tabulate characteristics of DV and IV*
				tab yI1_ xI1_, matcell(x)

				
*Turn crosstab into matrix 
				
matrix list x

scalar a = x[2,1]/2000*100
di a

scalar b = x[1,1]/2000*100
di b

scalar c = x[2,2]/2000*100
di c

scalar d = x[1,2]/2000*100
di d





************************
**T=100; x1 = I(0), rho=.8
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 100
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell(x)
				
*Turn crosstab into matrix to export DV and IV characteristics into excel

matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel G3 = a

scalar b = x[1,1]/2000*100
di a
putexcel G4 = b

scalar c = x[2,2]/2000*100
di c
putexcel G5 = c

scalar d = x[1,2]/2000*100
di d
putexcel G6 = d


************************
**T=200; x1 = I(0), rho=.8
**x2 = I(1)
**y = x1 + x2 
*************************

*reset the seed
set seed 457090451


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 200
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
	end

	

				simul p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				
*Diagnose I(1) of DV*
				sum p_value_df_y if p_value_df_y>0.05 
				sum p_value_df_y if p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				
*Diagnose I(1) of IV*
				sum p_value_df_x1 if p_value_df_x1>0.05
				sum p_value_df_x1 if p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if p_value_df_x1>.05

*Tabulate characteristics of DV and IV*
				tab yI1 xI1, matcell(x)

*Turn crosstab into matrix to export DV and IV characteristics into excel

matrix list x

scalar a = x[2,1]/2000*100
di a
putexcel J3 = a

scalar b = x[1,1]/2000*100
di a
putexcel J4 = b

scalar c = x[2,2]/2000*100
di c
putexcel J5 = c

scalar d = x[1,2]/2000*100
di d
putexcel J6 = d




**************************************************************************************
**************************************************************************************
****************************TABLE 4 - SUPPLEMENT**************************************
**************************************************************************************
**************************************************************************************



*Estimation of relationship between DV and IV where former seems I(1) and latter seems I(1)* 

************************
**T=50; x1 = I(0), rho=.8
**x2 = I(1)
**y = x1 + x2 
*************************

*************************************************************************
*************************************************************************
*Differenced specification
*************************************************************************
*************************************************************************
*keeping the same seed as used above

**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
syntax [, i(real 0) j(real 0) k(real 0) l(real 0) m(real 0) n(real 0) p(real 1) q(real 1) r(real 1) s(real 0) v(real 0) w(real 0) obs(integer 5000)] 
*#d
drop _all
set obs 50
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
gen dx1=x1-l.x1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that is a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2


		
		reg d.y l.y  
		predict resid_y, r 
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		while `i'<0.05 {  
		local j =`j'+ 1  
		reg d.y l.y l(1/`j')d.y  
		predict resid_y, r
		wntestq resid_y 
		local i=r(p)  
		drop resid_y 
		} 
		
		
		reg d.x1 l.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		while `k'<0.05 {  
		local l=`l'+ 1  
		reg d.x1 l.x1 l(1/`l')d.x1  
		predict resid_x1, r 
		wntestq resid_x1 
		local k=r(p)  
		drop resid_x1 
		} 
		
	
		
		dfuller y, lags(`j')  
		
		return scalar p_value_df_y=r(p)
		return scalar df_y=r(Zt)
		
		dfuller x1, lags(`l') 
		return scalar p_value_df_x1=r(p) 
		return scalar df_x1=r(Zt)
		
		reg d.y dx1
		mat P = r(p)
		return scalar pvalue_bg = P[1,1] 
		
	end

	
				simul _b _se p_value_df_y=r(p_value_df_y) p_value_df_x1=r(p_value_df_x1), reps(2000) nodots: combined_noq 

				sum _b_dx1 _se_dx1

				sum _eq2_p_value_df_y if _eq2_p_value_df_y>0.05 
				sum _eq2_p_value_df_y if _eq2_p_value_df_y<.05
				gen yI1=0
				replace yI1=1 if _eq2_p_value_df_y>.05
				local fsig_df_y = r(N)/10  
				sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1>0.05
				sum _eq2_p_value_df_x1 if _eq2_p_value_df_x1<.05
				local fsig_df_x1 = r(N)/10 
				gen xI1=0
				replace xI1=1 if _eq2_p_value_df_x1>.05
				tab yI1 xI1
				gen y1x0=0
				replace y1x0=1 if yI1==1 & xI1==0
				gen y1x1=0
				replace y1x1 =1 if yI1==1 & xI1==1
				sum _b_dx1 if y1x1==1
				gen tstat_dx1 = abs(_b_dx1/_se_dx1)
				sum tstat_dx1 if tstat_dx1>=1.96	
				sum tstat_dx1 if y1x1==1		
				sum _b_dx1 tstat_dx1 if y1x1==1 & tstat_dx1>1.96
	



